################################################
# Code for Producing Figures in "Niche Worlds"
################################################

#set working directory to wherever this R file is
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

#load libraries
library(ggplot2)
library(scales)
library(lubridate)
library(openxlsx)
library(plyr)
library(dplyr)
library(tidyverse)
library(ggrepel)
library(viridis)
library(hrbrthemes)
library(forecastML)
library(tstools)
library(tidyr)
library(theme_ipsum)
library(reshape)
library(reshape2)

#############
# Figure 1.1
#############

#load data
kr_market <- read.csv("Data/KRMarket_2018_2022.csv", encoding="UTF-8")

#transform data to get top 10 titles
kr_market$TIME <- as.Date(paste(kr_market$YEAR, "-", kr_market$MONTH, "-01", sep=""))

top10_kr <- kr_market %>%
  group_by(TIME) %>%
  arrange(DEMAND, .by_group = TRUE) %>%
  top_n(10, DEMAND)

#identify these titles in the original dataset and filter
top10_kr <- kr_market %>% filter(TITLE %in% unique(top10_kr$TITLE))

#fill the gaps in time series
top10_kr_fill <- top10_kr %>%
  group_by(PARROT_ID) %>%
  complete(TIME = seq(min(TIME), max(TIME), by = "1 month")) %>%
  ungroup()

#graph all shows in the top10, highlighting SquidGame
p <- top10_kr_fill %>%
  mutate( highlight=data.table::fifelse(TITLE=="Squid Game (오징어 게임)", "Squid Game (오징어 게임)", "A", na="A")) %>%
  ggplot( aes(x=TIME, y=DEMAND, group=TITLE, color=highlight, linewidth=highlight)) +
  geom_line() +
  scale_color_manual(values = c("lightgrey", "#69b3a2")) +
  scale_linewidth_manual(values=c(0.2, 1.5)) +
  theme_ipsum() +
  theme(
    legend.position = "none",
    plot.title = element_text(size = 14)
  )
p

ggsave("Figures/Figure1-1.jpeg", plot=p, device="jpg", width=7.5, height=5.0, dpi=400)

#############
# Figure 1.2
#############

# Load data
us_market <- read.csv("Data/USMarket_2018_2022.csv", encoding="UTF-8")

#transform data to get top 10 titles
us_market$TIME <- as.Date(paste(us_market$YEAR, "-", us_market$MONTH, "-01", sep=""))

top10_us <- us_market %>%
  group_by(TIME) %>%
  arrange(DEMAND, .by_group = TRUE) %>%
  top_n(10, DEMAND)

#identify these titles in the original dataset and filter
top10_us <- us_market %>% filter(TITLE %in% unique(top10_us$TITLE))

#fill the gaps in time series
top10_us_fill <- top10_us %>%
  group_by(PARROT_ID) %>%
  complete(TIME = seq(min(TIME), max(TIME), by = "1 month")) %>%
  ungroup()

#graph all shows in the top10, highlighting SquidGame
p <- top10_us_fill %>%
  mutate( highlight=data.table::fifelse(TITLE=="Squid Game (오징어 게임)", "Squid Game (오징어 게임)", "A", na="A")) %>%
  ggplot( aes(x=TIME, y=DEMAND, group=TITLE, color=highlight, linewidth=highlight)) +
  geom_line() +
  scale_color_manual(values = c("lightgrey", "#69b3a2")) +
  scale_linewidth_manual(values=c(0.2, 1.5)) +
  theme_ipsum() +
  theme(
    legend.position = "none",
    plot.title = element_text(size = 14)
  )
p

ggsave("Figures/Figure1-2.jpeg", plot=p, device="jpg", width=7.5, height=5.0, dpi=400)

#############
# Figure 1.3
#############

# Load data
jp_market <- read.csv("Data/JPMarket_2018_2022.csv", encoding="UTF-8")

#transform data to get top 10 titles
jp_market$TIME <- as.Date(paste(jp_market$YEAR, "-", jp_market$MONTH, "-01", sep=""))

top10_jp <- jp_market %>%
  group_by(TIME) %>%
  arrange(DEMAND, .by_group = TRUE) %>%
  top_n(10, DEMAND)

#identify these titles in the original dataset and filter
top10_jp <- jp_market %>% filter(TITLE %in% unique(top10_jp$TITLE))

#fill the gaps in time series
top10_jp_fill <- top10_jp %>%
  group_by(PARROT_ID) %>%
  complete(TIME = seq(min(TIME), max(TIME), by = "1 month")) %>%
  ungroup()

#graph all shows in the top10, highlighting SquidGame
p <- top10_jp_fill %>%
  mutate( highlight=data.table::fifelse(TITLE=="Squid Game (오징어 게임)", "Squid Game (오징어 게임)", "A", na="A")) %>%
  ggplot( aes(x=TIME, y=DEMAND, group=TITLE, color=highlight, linewidth=highlight)) +
  geom_line() +
  scale_color_manual(values = c("lightgrey", "#69b3a2")) +
  scale_linewidth_manual(values=c(0.2, 1.5)) +
  theme_ipsum() +
  theme(
    legend.position = "none",
    plot.title = element_text(size = 14)
  )
p

ggsave("Figures/Figure1-3.jpeg", plot=p, device="jpg", width=7.5, height=5.0, dpi=400)

#############
# Figure 1.4
#############

# Load data
gl_market <- read.csv("Data/GlobalHits_2018_2022.csv", encoding="UTF-8")

#transform data to get top 10 titles
gl_market$TIME <- as.Date(paste(gl_market$YEAR, "-", gl_market$MONTH, "-01", sep=""))

top10_gl <- gl_market %>%
  group_by(TIME) %>%
  arrange(DEMAND, .by_group = TRUE) %>%
  top_n(10, DEMAND)

#identify these titles in the original dataset and filter
top10_gl <- gl_market %>% filter(TITLE %in% unique(top10_us$TITLE))

#fill the gaps in time series
top10_gl_fill <- top10_gl %>%
  group_by(PARROT_ID) %>%
  complete(TIME = seq(min(TIME), max(TIME), by = "1 month")) %>%
  ungroup()

#graph all shows in the top10, highlighting SquidGame
p <- top10_gl_fill %>%
  mutate( highlight=data.table::fifelse(TITLE=="Squid Game (오징어 게임)", "Squid Game (오징어 게임)", "A", na="A")) %>%
  ggplot( aes(x=TIME, y=DEMAND, group=TITLE, color=highlight, linewidth=highlight)) +
  geom_line() +
  scale_color_manual(values = c("lightgrey", "#69b3a2")) +
  scale_linewidth_manual(values=c(0.2, 1.5)) +
  theme_ipsum() +
  theme(
    legend.position = "none",
    plot.title = element_text(size = 14)
  )
p

ggsave("Figures/Figure1-4.jpeg", plot=p, device="jpg", width=7.5, height=5.0, dpi=400)

##############
# Figures 2-4
##############

# Load "global hits by market" data
# ORIGINAL SOURCE DATA FOR THIS FIGURE UNAVAILABLE DUE TO LICENSCING RESTRICTIONS 
#hits_by_market <- read.csv("../Parrot/AnnotationData/GlobalHitsByMarket.csv", encoding="UTF-8")

# specify title of show you want to visualize
#show_title = "Crash Landing on You"
#show_title = "The Walking Dead"
show_title = "Squid Game"

# specify id of show you want to visualize
#show_id = "1ce47d76-98b3-4575-b371-9844c2ab13f7"  #Crash Landing On You
#show_id = "3d6a2781-4f41-4847-82c0-5f5d6f71b71e"  #Walking Dead
show_id = "22790f55-321b-4fbe-b3b6-855a25a5ea1d"  #Squid Game

# filter by specific show [DATA IS ALREADY PRE-FILTERED, SKIP]
#show_demand <- hits_by_market %>% filter(PARROT_ID == show_id)

# Load demand data for show
#show_demand <- read.csv("Data/Crash_Demand.csv", fileEncoding = "UTF-8")
#show_demand <- read.csv("Data/Walking_Demand.csv", fileEncoding = "UTF-8")
show_demand <- read.csv("Data/Squid_Demand.csv", fileEncoding = "UTF-8")

#create new time columns to help filter by release date
show_demand$DEMAND_TIME <- as.Date(paste(show_demand$YEAR, "-", show_demand$MONTH, "-01", sep=""))
show_demand$RELEASE_TIME <- as.Date(paste(show_demand$RELEASE_YEAR, "-", show_demand$RELEASE_MONTH, "-01", sep=""))

#filter out rows prior to show release
show_demand <- show_demand %>% filter(DEMAND_TIME > RELEASE_TIME)

#change MARKET_CODE to factor
show_demand$MARKET_CODE <- as.factor(show_demand$MARKET_CODE)

#visualize distribution of PERC_OF_TOP across all markets
p <- show_demand %>%
  mutate(MARKET_CODE = fct_relevel(MARKET_CODE, 
                                   "AR","MX","CO","BR",
                                   "FR","ES","IT","SE","DE",
                                   "EG","NG",
                                   "TW","CN","JP","KR","IN",
                                   "US","CA","GB","AU")) %>%
  ggplot(aes(x=PERC_OF_TOP,y=MARKET_CODE, color=MARKET_CODE)) +
  geom_boxplot()+
  geom_jitter(size=0.5, width=0.05, alpha=0.5)+
  ggtitle(show_title) +
  xlim(0,1) +
  labs(x="Percent of the Top In-Demand Show (1 = most in-demand)", y="National Market") +
  theme(legend.position="none")
p

ggsave("Figures/Figure2.jpeg", plot=p, device="jpg", width=7.5, height=6.0, dpi=400)

############
# Figure 5
############

hits_by_market_meta <- read.csv("Data/GLOBAL_HITS_METADATA.csv", encoding="UTF-8")

#do some preprocessing on the full dataset
hits_by_market$GENRE_SIMPLE <- as.factor(hits_by_market$GENRE_SIMPLE)
hits_by_market$US_ORIGIN <- as.factor(hits_by_market$US_ORIGIN)

#filter shows where demand figures are NA
hits_by_market <- hits_by_market %>% filter(!is.na(MARKET_ENTROPY))

#set label for number of markets
hits_by_market$MANY_MARKETS <- (hits_by_market$MARKETS >= 10)

#set label for release date
hits_by_market$PRE_2018 <- (hits_by_market$RELEASE_YEAR < 2018)

#set label for country origins
hits_by_market <- hits_by_market %>%
  mutate(three_country = case_when(COUNTRY_ORIGIN == "Japan" ~ "Japan", 
                                   COUNTRY_ORIGIN == "South Korea" ~ "Korea",
                                   COUNTRY_ORIGIN == "India" ~ "India"))
#set level order
hits_by_market$three_country <- factor(hits_by_market$three_country, levels = c("Japan", "Korea", "India"))

# Identify top quartile in each of two variables
# Add rankings for each variable
hits_by_market <- hits_by_market %>%
  mutate(rank_var1 = rank(AVG_MAX_DEMAND, ties.method = "first") / n(),
         rank_var2 = rank(MARKET_ENTROPY, ties.method = "first") / n())

# Filter for top quartile in both dimensions
top_quartile <- hits_by_market %>%
  filter(rank_var1 > 0.75, rank_var2 > 0.75)

# Filter for top quadrant; note we are using different percentages for each rank
top_shows <- hits_by_market %>%
  filter(rank_var1 > 0.95, rank_var2 > 0.75)

#single out some texts for labeling
hits_by_market$group <- (hits_by_market$AVG_MAX_DEMAND >= 0.7 & hits_by_market$MARKET_ENTROPY >= 4)
hits_by_market$group2 <- (hits_by_market$AVG_MAX_DEMAND >= 0.5 & hits_by_market$MARKET_ENTROPY >= 4) & (hits_by_market$COUNTRY_ORIGIN=="Japan" | hits_by_market$COUNTRY_ORIGIN=="South Korea")

#produce a scatterplot for global map w top quartile highlighted in red
p <- ggplot(data=hits_by_market, aes(x=AVG_MAX_DEMAND, y=MARKET_ENTROPY)) + 
  geom_point(alpha=.5, size=.5) + #, size=1) + # use .5 if your sizing nodes, 1 otherwise
  geom_point(data = top_quartile, aes(x=AVG_MAX_DEMAND, y=MARKET_ENTROPY), size=.5, color='red') +  # Highlight top quartile points
  geom_text_repel(data=filter(hits_by_market, group=="TRUE"), max.overlaps=25, aes(label=TITLE), size=2) + #was 1.5
  xlab("Higher Average Maximum Demand across Markets ---- >") + 
  ylab("More Consistent Relative Demand across Markets ---- >") +
  ylim(0,8) +
  theme(legend.position = "", 
        axis.text=element_text(size=12),
        axis.title.x=element_text(size=12, face="bold"),
        axis.title.y=element_text(size=12, face="bold"),
        title=element_blank()) #,face="bold"))
p

ggsave("Figures/Figure5.jpeg", plot=p, device="jpg", width=7.5, height=5.0, dpi=400)

############
# Figure 6
############

# FIRST run the previou code for Figure 5

#scatterplot for global map w Japan, Korea, and India colored
p <- ggplot(data=hits_by_market, aes(x=AVG_MAX_DEMAND, y=MARKET_ENTROPY)) + 
  # First layer: all points with alpha=0.5 (no color mapping)
  geom_point(alpha=0.5, size=.5, color="gray50") + 
  # Second layer: only points with three_country values (full opacity with color)
  geom_point(data=filter(hits_by_market, !is.na(three_country)), 
             aes(color=three_country), alpha=1, size=1) +  
  geom_text_repel(data=filter(hits_by_market, group2=="TRUE"), max.overlaps=25, aes(label=TITLE), size=2) + #was 1.5
  xlab("Higher Average Maximum Demand across Markets ---- >") + 
  ylab("More Consistent Relative Demand across Markets ---- >") +
  ylim(0,8) +
  theme(legend.position = "", 
        axis.text=element_text(size=12),
        axis.title.x=element_text(size=12, face="bold"),
        axis.title.y=element_text(size=12, face="bold"),
        title=element_blank()) #,face="bold"))
p

ggsave("Figures/Figure6.jpeg", plot=p, device="jpg", width=7.5, height=5.0, dpi=400)

############
# Figure 7
############

# load the data
AllTweets <- read.csv("Data/TweetCountsMultShows.csv")

# Convert start date to proper date format and extract year-month
AllTweets$start <- as.POSIXct(AllTweets$start)
AllTweets$year_month <- floor_date(AllTweets$start, "month")

# Filter for the selected titles and date range
selected_shows <- c("Squid Game", "Stranger Things", #"Game Of Thrones"
                    "Money Heist", "Attack On Titan", "Better Call Saul",
                    "The Walking Dead", "Westworld", "Supernatural", "Grey's Anatomy")

don <- AllTweets %>%
  filter(title %in% selected_shows) %>%
  filter(year_month >= as.POSIXct("2018-01-01") & year_month <= as.POSIXct("2022-12-31")) %>%
  #mutate(total_activity = tweets + retweets) %>%
  mutate(total_activity = tweets) %>%
  group_by(year_month, title) %>%
  dplyr::summarise(total_activity = sum(total_activity), .groups = "drop") %>%
  mutate(title2 = title)  # Create a duplicate column for grouping

# Create separate dataset for Squid Game
squid_game <- don %>% filter(title == "Squid Game")

# Plot
p <- don %>%
  ggplot(aes(x=year_month, y=total_activity)) +
  geom_line(data=don %>% dplyr::select(-title), aes(group=title2), 
            color="grey", linewidth=0.5, alpha=0.5) +
  geom_line(data=squid_game %>% dplyr::select(-title), 
            color="red", linewidth=0.8, alpha=0.7) +
  geom_line(aes(color=title), color="#69b3a2", linewidth=1.2) +
  scale_y_continuous(labels = label_number(suffix = "k", scale = 1e-3)) +
  theme_ipsum() +
  theme(
    legend.position="none",
    plot.title = element_text(size=14),
    panel.grid = element_blank()
  ) +
  #ggtitle("TV Show Twitter Activity (2018-2022)") +
  ylab("") +
  xlab("") +
  facet_wrap(~title)

print(p)

# Save the plot
ggsave("Figures/Figure7.jpg", plot = p, width = 12, height = 8, dpi = 400)

###############
# Figures 8-10
###############

###################
#load English data
cite_counts <- read.csv("Data/all_ref_tweets_with_timetags_EN.csv", encoding="UTF-8")

#format start column
cite_counts$date <- as.Date(cite_counts$date)
#filter out rows with date after November 15
cite_counts <- cite_counts %>% filter(date < "2021-11-15" & date > "2021-09-01")
#transform the data
top_cites <- cite_counts[c("username","date","likes","Alice","BattleRoyale","Saw","Kaiji","HungerGames","Parasite","GodsWill","SweetHome","Danganronpa","MyName")]
top_cites <- melt(top_cites, id = c("username","date","likes")) 

#get daily counts for all shows
top_cites <- top_cites %>%
  group_by(date, variable) %>%
  summarize(n = sum(value))

#graph time series
e <- ggplot(data=top_cites, aes(x=date, y=n, fill=variable)) + 
  geom_area(alpha=0.6, size=.5, colour="white") +
  ggtitle("Squid Game is like...", subtitle="English Tweet Data") +
  theme_ipsum() +
  ylab("Number of References") +
  xlab("Date") +
  labs(fill = "Show Title") +
  theme(plot.subtitle = element_text(face = "italic"))
e

ggsave("Figures/Figure8.jpeg", plot=e, device="jpg", width=7.5, height=5.0, dpi=400)

####################
#load Japanese data
cite_counts <- read.csv("Data/all_ref_tweets_with_timetags_JP.csv", encoding="UTF-8")

#format start column
cite_counts$date <- as.Date(cite_counts$date)
#filter out rows with date after November 15
cite_counts <- cite_counts %>% filter(date < "2021-11-15" & date > "2021-09-01")
#transform the data
top_cites <- cite_counts[c("username","date","likes","Kaiji","GodsWill","Alice","LiarGame","MyName","BattleRoyale","Parasite","Saw","SweetHome","Cube")]
top_cites <- melt(top_cites, id = c("username","date","likes")) 

#get daily counts for all shows
top_cites <- top_cites %>%
  group_by(date, variable) %>%
  dplyr::summarize(n = sum(value))

#graph time series
j <- ggplot(data=top_cites, aes(x=date, y=n, fill=variable)) + 
  geom_area(alpha=0.6, size=.5, colour="white") +
  ggtitle("Squid Game is like...", subtitle="Japanese Tweet Data") +
  theme_ipsum() +
  ylab("Number of References") +
  xlab("Date") +
  labs(fill = "Show Title") +
  theme(plot.subtitle = element_text(face = "italic"))
j

ggsave("Figures/Figure9.jpeg", plot=j, device="jpg", width=7.5, height=5.0, dpi=400)

#################
#load Korean data

cite_counts <- read.csv("Data/all_ref_tweets_with_timetags_KR.csv", encoding="UTF-8")

#format start column
cite_counts$date <- as.Date(cite_counts$date)
#filter out rows with date after November 1
cite_counts <- cite_counts %>% filter(date < "2021-11-15" & date > "2021-09-01")
#transform the data
top_cites <- cite_counts[c("username","date","likes","Parasite","MyName","Kaiji","BattleRoyale","SweetHome","GodsWill","Alice","HungerGames")]
top_cites <- melt(top_cites, id = c("username","date","likes")) 

#get daily counts for all shows
top_cites <- top_cites %>%
  group_by(date, variable) %>%
  dplyr::summarize(n = sum(value))

#graph time series
k <- ggplot(data=top_cites, aes(x=date, y=n, fill=variable)) + 
  geom_area(alpha=0.6, linewidth =.5, colour="white") +
  ggtitle("Squid Game is like...", subtitle="Korean Tweet Data") +
  theme_ipsum() +
  ylab("Number of References") +
  xlab("Date") +
  labs(fill = "Show Title") +
  theme(plot.subtitle = element_text(face = "italic"))
k

ggsave("Figures/Figure10.jpeg", plot=k, device="jpg", width=7.5, height=5.0, dpi=400)

################################################
# Figures 11-12: See User_Data_Analysis.ipynb
################################################

#############
# Figure 13
#############

# Load in the data
concentration_df <- read.csv("Data/UserConcentration.csv")

# Sort by concentration (descending, so least concentrated first)
concentration_df <- concentration_df %>% arrange(desc(Pct_In_Top10))
concentration_df$rank <- 1:nrow(concentration_df)

# Get the 3 least concentrated shows (now at the top after sorting desc)
least_concentrated <- concentration_df %>% 
  arrange(Pct_In_Top10) %>% 
  head(3) %>% 
  pull(Show)

# Define which shows to highlight
highlight_shows <- c('Squid Game', 'Korean Romance or Historical', 
                     'Korean Zombie or Horror', 'Jujutsu Kaisen', 
                     least_concentrated, 'One Piece', 'My Hero Academia')

# Create color variable
concentration_df <- concentration_df %>%
  mutate(color = case_when(
    Show == 'Squid Game' ~ 'red',
    grepl('Korean', Show) ~ 'orange',
    Show %in% highlight_shows ~ 'darkblue',
    TRUE ~ 'lightgray'
  ),
  show_label = ifelse(Show %in% highlight_shows, Show, NA))

# Create the plot
ggplot(concentration_df, aes(x = Pct_In_Top10, y = rank)) +
  # All points in gray
  geom_point(data = filter(concentration_df, !Show %in% highlight_shows), 
             color = 'lightgray', size = 2, alpha = 0.6) +
  # Highlighted points
  geom_point(data = filter(concentration_df, Show %in% highlight_shows),
             aes(color = Show), size = 4) +
  # Labels for highlighted shows
  geom_text(data = filter(concentration_df, Show %in% highlight_shows),
            aes(label = Show, color = Show), 
            hjust = -0.1, size = 4, show.legend = FALSE) +
  # Manual colors
  scale_color_manual(values = c(
    'Squid Game' = 'red',
    'Korean Romance or Historical' = 'orange',
    'Korean Zombie or Horror' = 'orange',
    'Jujutsu Kaisen' = 'darkblue',
    'Riverdale' = 'darkblue',
    'Umbrella Academy' = 'darkblue',
    'Lucifer' = 'darkblue',
    'One Piece' = 'darkblue',
    'My Hero Academia' = 'darkblue'
  )) +
  # Reference line at 50%
  geom_vline(xintercept = 50, linetype = 'dashed', alpha = 0.5) +
  # Extend x-axis to 90
  xlim(40, 86) +
  # Labels
  labs(x = '% of User Activity in Top 10 Other Subreddits',
       y = 'Subreddits (ranked by concentration)') +
  theme_minimal() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.title = element_text(size = 14),
        legend.position = 'none',
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())

ggsave("Figures/Figure13.jpg", dpi = 400, width = 10, height = 8, units = "in")

###############################################
# Figure 14: See Topic_Model_Analysis.ipynb
###############################################

#############
# Figure 15
#############

# Read in the data
centrality_df <- read.csv('Data/show_centrality_metrics.csv')

# Calculate medians for quadrant lines
close_med <- median(centrality_df$closeness_centrality)
eigen_med <- median(centrality_df$eigenvector_centrality)

# Define shows to label
shows_to_label <- c("Squid Game", "Game of Thrones", "SpongeBob SquarePants", 
                    "Bridgerton", "The 100", "13 Reasons Why", "The Witcher", 
                    "Jujutsu Kaisen", "The Falcon and The Winter Soldier",
                    "Korean Zombie or Horror")

# Create a label column (only for selected shows)
centrality_df <- centrality_df %>%
  mutate(label = ifelse(show %in% shows_to_label, show, NA))

# Define quadrant colors
quadrant_colors <- c(
  'mainstream_generalist' = 'lightgreen',
  'mainstream_specialist' = 'lightcoral',
  'niche_generalist' = 'lightblue',
  'niche_specialist' = 'lightyellow'
)

p <- ggplot(centrality_df, aes(x = closeness_centrality, y = eigenvector_centrality)) +
  
  # Quadrant lines
  geom_hline(yintercept = eigen_med, linetype = "dashed", color = "red", alpha = 0.7) +
  geom_vline(xintercept = close_med, linetype = "dashed", color = "red", alpha = 0.7) +
  # Extend x-axis to 90
  xlim(8, 13) +
  
  # Points - uniform size
  geom_point(color = "black", alpha = 0.7, size = 2) +
  
  # Highlight Squid Game
  geom_point(data = filter(centrality_df, show == "Squid Game"),
             fill = "red", color = "black", alpha = 1, size = 2) +
  
  # Labels
  # Replace geom_label with:
  geom_label_repel(aes(label = label),
                   na.rm = TRUE,
                   size = 4,
                   alpha = 0.8,
                   label.padding = unit(0.15, "lines"),
                   box.padding = unit(0.35, "lines"),
                   point.padding = unit(0.3, "lines"),
                   segment.color = "black",
                   segment.size = 0.3,
                   min.segment.length = 0,
                   max.overlaps = Inf) +
  
  # Add quadrant labels
  # Get plot limits for positioning
  annotate("label", 
           x = 13, 
           y = eigen_med + (max(centrality_df$eigenvector_centrality) - eigen_med) * 0.1,
           label = "Mainstream\nGeneralists", 
           hjust = 1, vjust = 0,
           size = 5, fontface = "italic",
           fill = "lightyellow", alpha = 0.6) +
  
  annotate("label",
           x = 8,
           y = eigen_med + (max(centrality_df$eigenvector_centrality) - eigen_med) * 0.1,
           label = "Mainstream\nSpecialists",
           hjust = 0, vjust = 0,
           size = 5, fontface = "italic",
           fill = "lightyellow", alpha = 0.6) +
  
  annotate("label",
           x = 13,
           y = eigen_med - (eigen_med - min(centrality_df$eigenvector_centrality)) * 0.1,
           label = "Focused\nGeneralists",
           hjust = 1, vjust = 1,
           size = 5, fontface = "italic",
           fill = "lightyellow", alpha = 0.6) +
  
  annotate("label",
           x = 8,
           y = eigen_med - (eigen_med - min(centrality_df$eigenvector_centrality)) * 0.1,
           label = "Focused\nSpecialists",
           hjust = 0, vjust = 1,
           size = 5, fontface = "italic",
           fill = "lightyellow", alpha = 0.6) +
  
  scale_fill_manual(values = quadrant_colors, guide = "none") +
  
  labs(x = "Closeness Centrality",
       y = "Eigenvector Centrality") +
  
  #theme_minimal() +
  theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5))

# Display the plot
print(p)

# Save the plot
ggsave("Figures/Figure15.jpg", plot = p, width = 12, height = 8, dpi = 400)

###############################################
# Figures 16-19: See Sentiment_Analysis.ipynb
###############################################


